{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run tools.ipynb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### BaseFeaturizer and BaseDescriptor "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- import necessary libraries\n",
    "from xenonpy.descriptor.base import BaseDescriptor, BaseFeaturizer\n",
    "\n",
    "# --- Featurizer ---\n",
    "class RadialDistributionFunction_(BaseFeaturizer):\n",
    "\n",
    "    @property\n",
    "    def feature_labels(self):\n",
    "        return [str(d) for d in self._interval[1:]]\n",
    "\n",
    "    def __init__(self, n_bins=201, r_max=20.0, *, n_jobs=-1, on_errors='raise', return_type='any'):\n",
    "\n",
    "        super().__init__(n_jobs=n_jobs, on_errors=on_errors, return_type=return_type)\n",
    "\n",
    "        self.n_bins = n_bins\n",
    "        self.r_max = r_max\n",
    "        self.dr = r_max / (n_bins - 1)\n",
    "        self._interval = np.arange(0.0, r_max + self.dr, self.dr)\n",
    "        self.__authors__ = ['TsumiNa']\n",
    "\n",
    "    def featurize(self, structure):\n",
    "\n",
    "        # Get the distances between all atoms\n",
    "        neighbors_lst = structure.get_all_neighbors(self.r_max)\n",
    "        all_distances = np.concatenate(tuple(map(lambda x: [e[1] for e in x], neighbors_lst)))\n",
    "\n",
    "        # Compute a histogram\n",
    "        dist_hist, dist_bins = np.histogram(all_distances, bins=self._interval, density=False)\n",
    "\n",
    "        # Normalize counts\n",
    "        shell_vol = 4.0 / 3.0 * np.pi * (np.power(dist_bins[1:], 3) - np.power(dist_bins[:-1], 3))\n",
    "        number_density = structure.num_sites / structure.volume\n",
    "        return dist_hist / shell_vol / number_density\n",
    "\n",
    "# --- Descriptor ---\n",
    "class Compositions_(BaseDescriptor):\n",
    "\n",
    "    def __init__(self, elements=None, *, n_jobs=-1, include=None,\n",
    "                 exclude=None, on_errors='raise'):\n",
    "        super().__init__()\n",
    "        self.n_jobs = n_jobs\n",
    "\n",
    "        self.composition = WeightedAvgFeature(elements, n_jobs=n_jobs, on_errors=on_errors)\n",
    "        self.composition = WeightedSumFeature(elements, n_jobs=n_jobs, on_errors=on_errors)\n",
    "        self.composition = WeightedVarFeature(elements, n_jobs=n_jobs, on_errors=on_errors)\n",
    "        self.composition = MaxFeature(elements, n_jobs=n_jobs, on_errors=on_errors)\n",
    "        self.composition = MinFeature(elements, n_jobs=n_jobs, on_errors=on_errors)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### build your own"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- import necessary libraries\n",
    "\n",
    "from xenonpy.descriptor.base import BaseDescriptor, BaseFeaturizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class YourFeaturier1(BaseFeaturizer):\n",
    "\n",
    "    def __init__(self, n_jobs=1):\n",
    "        super().__init__(n_jobs=n_jobs)\n",
    "\n",
    "    def featurize(self, x):\n",
    "        return x * 10\n",
    "\n",
    "    @property\n",
    "    def feature_labels(self):\n",
    "        return ['times']\n",
    "\n",
    "class YourFeaturier2(BaseFeaturizer):\n",
    "\n",
    "    def __init__(self, n_jobs=1):\n",
    "        super().__init__(n_jobs=n_jobs)\n",
    "\n",
    "    def featurize(self, x):\n",
    "        return x ** 2\n",
    "\n",
    "    @property\n",
    "    def feature_labels(self):\n",
    "        return ['power']    \n",
    "\n",
    "class YourDescriptor(BaseDescriptor):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.f1 = YourFeaturier1()\n",
    "        self.f2 = YourFeaturier2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_data1 = np.array([1, 2, 3, 4, 5])\n",
    "test_data2 = np.array([6, 7, 8, 9, 10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>times</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   times\n",
       "0     10\n",
       "1     20\n",
       "2     30\n",
       "3     40\n",
       "4     50"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "yf1 = YourFeaturier1()\n",
    "yf1.fit_transform(test_data1, return_type='df')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   power\n",
       "0     36\n",
       "1     49\n",
       "2     64\n",
       "3     81\n",
       "4    100"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "yf2 = YourFeaturier2()\n",
    "yf2.fit_transform(test_data2, return_type='df')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>f1</th>\n",
       "      <th>f2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   f1  f2\n",
       "0   1   6\n",
       "1   2   7\n",
       "2   3   8\n",
       "3   4   9\n",
       "4   5  10"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data = pd.DataFrame({'f1': test_data1, 'f2': test_data2})\n",
    "test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>times</th>\n",
       "      <th>power</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>40</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>50</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   times  power\n",
       "0     10     36\n",
       "1     20     49\n",
       "2     30     64\n",
       "3     40     81\n",
       "4     50    100"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "yd = YourDescriptor()\n",
    "yd.fit_transform(test_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
